*1 Prepare Data for Matlab Rho Estimation
*************************************************
use "data_&_theta.dta", clear
	gen beta_l1	=	beta1_l
	gen beta_m1	=	beta1_m
	gen beta_k1	=	beta1_k
	gen beta_l2	=	beta1_ll
	gen beta_m2	=	beta1_mm
	gen beta_k2	=	beta1_kk
	gen beta_lm	=	beta1_lm
	gen beta_lk	=	beta1_lk
	gen beta_mk	=	beta1_mk
	gen beta_lmk	=	beta1_lmk
		
	qui tab UNIT2, g(_UNIT2_fe)
	
	*Get Eq separately by pnic2
	levels pnic2, local(plist)
		qui foreach i of local plist {
			preserve
			keep if pnic2==`i'
*** MTB: CHANGE IN ORIGINAL CODE: TARRIFS COMMENTED OUT AND ADDITIONAL REGRESSORS ADDED			
			xi: areg q_j p_j* l_s* m1* k_s* /*tau_in* tau_out**/ i.year i.cl_pnic i.pcode12 exp_p dest_p dest_p_sq treated_p exp dest dest_sq treated , a(UNIT2)
			predict Eq, xb
			saveold tmp_Eq_`i',replace
			restore
			}
			
	use ${empty}/empty,clear
		foreach i of local plist {
		append using tmp_Eq_`i'
			erase tmp_Eq_`i'.dta
			}
		drop if year==.

	*compute omega
	egen pnic2_UNIT2 = group(pnic2 UNIT2)
	g f1 = k*beta_k1 - wfjt*beta_k1 + k^2*beta_k2 - 2*k*wfjt*beta_k2 + wfjt^2*beta_k2 + l*beta_l1 - wfjt*beta_l1 + k*l*beta_lk - k*wfjt*beta_lk - l*wfjt*beta_lk + wfjt^2*beta_lk + l^2*beta_l2 - 2*l*wfjt*beta_l2 + wfjt^2*beta_l2 + l*m*beta_lm - l*wfjt*beta_lm - m*wfjt*beta_lm + wfjt^2*beta_lm + k*l*m*beta_lmk - k*l*wfjt*beta_lmk - k*m*wfjt*beta_lmk - l*m*wfjt*beta_lmk + k*wfjt^2*beta_lmk + l*wfjt^2*beta_lmk + m*wfjt^2*beta_lmk - wfjt^3*beta_lmk + m*beta_m1 - wfjt*beta_m1 + k*m*beta_mk - k*wfjt*beta_mk - m*wfjt*beta_mk + wfjt^2*beta_mk + m^2*beta_m2 - 2*m*wfjt*beta_m2 + wfjt^2*beta_m2
	g omega_fjt_x = Eq - f1
			drop f1
		areg omega_fjt_x, a(pnic2_UNIT2)	
			predict omega_fjt_xr,r			
			drop omega_fjt_x

	*generate coefficients on rho's
	g a_ft = (beta_k1 + beta_l1 + 3*wfjt^2*beta_lmk + l*(beta_lk + 2*beta_l2 + beta_lm + k*beta_lmk + m*beta_lmk - 2*wfjt*beta_lmk) + beta_m1 + k*(2*beta_k2 + beta_lk + m*beta_lmk - 2*wfjt*beta_lmk + beta_mk) + wfjt*(-2*beta_k2 - 2*beta_lk - 2*beta_l2 - 2*beta_lm - 2*beta_mk - 2*beta_m2) + m*(beta_lm - 2*wfjt*beta_lmk + beta_mk + 2*beta_m2))
	g b_ft = (beta_k2 + beta_lk + beta_l2 + beta_lm + k*beta_lmk + l*beta_lmk + m*beta_lmk - 3*wfjt*beta_lmk + beta_mk + beta_m2)
	g c    = beta_lmk

	egen tmp = group(year fid)
		bys tmp: g NN = _N
		drop if NN == 1
		drop NN
	
	*prepare for matlab
	keep fup12 fid year a_ft b_ft c sv omega_fjt_xr
	sort fup12 year
	egen fid2yr = group(year fid)
	sort fid2yr
	by fid2yr : g N = _N
	by fid2yr : egen totsv = sum(sv)
	g svshare = sv/totsv
		drop sv totsv

	*if omega is zero, make all the a's b's and c's missing
		foreach v of varlist a_ft b_ft c sv omega_fjt_xr {
			replace `v' = 0 if `v' == .
			}
	sort fid2yr
	gsort fid2yr -sv
	xtile iter = fid2yr, nq(30)
	compress
	order fup12 fid year a_ft b_ft c sv N fid2yr omega_fjt_xr iter
	saveold tmp_matlab,replace

*2 Rho Recovery [Matlab]
*************************************************
	use tmp_matlab,clear
		erase tmp_matlab.dta
	sum fid2yr
		global K2 = r(max)
		noi dis ${K2}

		!rm -rf "${worklocal}/dump"
		cap mkdir "${worklocal}/dump"

	*copy matlab files to a dump directory
	outsheet using "${worklocal}/dump/matlab_raw.csv",comma replace nonames

	cd ${code}/matlab/
		!cp -r * "${worklocal}/dump/"

	cd "${worklocal}/dump"

	*run matlab2012a on columbia's research grid.
	noi dis "> ${dirnum} - matlab_start"
*	!oldmatlab --grid_submit=batch --grid_ncpus=4 --grid_mem=32G "-r control11"
	shell matlab -noFigureWindows -r "try; run('control11.m'); catch; end; quit;"
	
	sleep 30000

	capture confirm file lock.out
		while _rc {
		capture confirm file lock.out
		sleep 30000
		}
	noi dis "> ${dirnum} - matlab_end"
	cap mkdir ${worklocal}/rhos

	do ${code}/03a_processrhos.do
	cd ${worklocal}

*3 Generate Markups, Prepare for Estimations
*************************************************
use "data_&_theta.dta", clear
	*translog coefficients
		gen beta_l1	=	beta1_l
		gen beta_m1	=	beta1_m
		gen beta_k1	=	beta1_k
		gen beta_l2	=	beta1_ll
		gen beta_m2	=	beta1_mm
		gen beta_k2	=	beta1_kk
		gen beta_lm	=	beta1_lm
		gen beta_lk	=	beta1_lk
		gen beta_mk	=	beta1_mk
		gen beta_lmk	=	beta1_lmk
			drop beta1*
/*
	*translog-no input correction coefficients
		gen bet2a2_l1	=	beta2_l
		gen bet2a2_m1	=	beta2_m
		gen bet2a2_k1	=	beta2_k
		gen bet2a2_l2	=	beta2_ll
		gen bet2a2_m2	=	beta2_mm
		gen bet2a2_k2	=	beta2_kk
		gen bet2a2_lm	=	beta2_lm
		gen bet2a2_lk	=	beta2_lk
		gen bet2a2_mk	=	beta2_mk
		gen bet2a2_lmk	=	beta2_lmk
			drop beta2*
			rename bet2a2* beta2*

	*tranlog-no selection correction coefficients
		gen bet3a3_l1	=	beta3_l
		gen bet3a3_m1	=	beta3_m
		gen bet3a3_k1	=	beta3_k
		gen bet3a3_l2	=	beta3_ll
		gen bet3a3_m2	=	beta3_mm
		gen bet3a3_k2	=	beta3_kk
		gen bet3a3_lm	=	beta3_lm
		gen bet3a3_lk	=	beta3_lk
		gen bet3a3_mk	=	beta3_mk
		gen bet3a3_lmk	=	beta3_lmk
			drop beta3*
			rename bet3a3* beta3*
*/			
			cap drop rho

	*merge in rhos
		sort fup12 year
		merge fup12 year using rhos, update
			replace exit_x     = 1 if n==1
			replace rho_x      = 1 if n==1
			drop _m
			
	*get revenue share
		bys fid year: egen totsv = sum(sv)
		g svshare = sv/totsv
		drop totsv
		g lsv = log(sv)

	* compute markups:
		g lxmfgshare = log(xmfgshare)
			g c_x           = rho_x
			g M_fjt_x       = c_x*(s110*xmfgshare)
			g L_fjt_x       = c_x*(s121)
			g K_fjt_x       = c_x*exp(k)
			gen Wfjt		=	exp(wfjt)

			* calculate physical inputs using Wfjt
			g Mx_fjt_x			= M_fjt_x/Wfjt
			g Lx_fjt_x			= L_fjt_x/Wfjt
			g Kx_fjt_x			= K_fjt_x/Wfjt

			g m_fjt_x       = ln(Mx_fjt_x)
			g l_fjt_x       = ln(Lx_fjt_x)
			g k_fjt_x       = ln(Kx_fjt_x)

			*output elasticities, main specification
			g theta_fjt_x   = beta_m1 + 2*beta_m2*m_fjt_x + beta_lm*l_fjt_x + beta_mk*k_fjt_x + beta_lmk*l_fjt_x*k_fjt_x
			g theta_L_fjt_x = beta_l1 + 2*beta_l2*l_fjt_x + beta_lm*m_fjt_x + beta_lk*k_fjt_x + beta_lmk*m_fjt_x*k_fjt_x
			g theta_K_fjt_x = beta_k1 + 2*beta_k2*k_fjt_x + beta_lk*l_fjt_x + beta_mk*m_fjt_x + beta_lmk*l_fjt_x*m_fjt_x
			g RTS_tl_x      = theta_fjt_x + theta_L_fjt_x + theta_K_fjt_x
/*
			*output elasticities for coefficients without input price correction
			g theta2_fjt_x   = beta2_m1 + 2*beta2_m2*m_fjt_x + beta2_lm*l_fjt_x + beta2_mk*k_fjt_x + beta2_lmk*l_fjt_x*k_fjt_x
			g theta2_L_fjt_x = beta2_l1 + 2*beta2_l2*l_fjt_x + beta2_lm*m_fjt_x + beta2_lk*k_fjt_x + beta2_lmk*m_fjt_x*k_fjt_x
			g theta2_K_fjt_x = beta2_k1 + 2*beta2_k2*k_fjt_x + beta2_lk*l_fjt_x + beta2_mk*m_fjt_x + beta2_lmk*l_fjt_x*m_fjt_x
			g RTS2_tl_x      = theta2_fjt_x + theta2_L_fjt_x + theta2_K_fjt_x
			*output elasticity for coefficients without selection correction
			g theta3_fjt_x    = beta3_m1 + 2*beta3_m2*m_fjt_x + beta3_lm*l_fjt_x + beta3_mk*k_fjt_x + beta3_lmk*l_fjt_x*k_fjt_x
			g theta3_L_fjt_x = beta3_l1 + 2*beta3_l2*l_fjt_x + beta3_lm*m_fjt_x + beta3_lk*k_fjt_x + beta3_lmk*m_fjt_x*k_fjt_x
			g theta3_K_fjt_x = beta3_k1 + 2*beta3_k2*k_fjt_x + beta3_lk*l_fjt_x + beta3_mk*m_fjt_x + beta3_lmk*l_fjt_x*m_fjt_x
			g RTS3_tl_x      = theta3_fjt_x + theta3_L_fjt_x + theta3_K_fjt_x
*/			
			label var theta_fjt_x "materials output elasticity"
			label var theta_L_fjt_x "labor output elasticity"
			label var theta_K_fjt_x "capital output elasticity"
/*
			label var theta2_fjt_x "materials output elasticity, no input correction"
			label var theta2_L_fjt_x "labor output elasticity, no input correction"
			label var theta2_K_fjt_x "capital output elasticity, no input correction"
			
			label var theta3_fjt_x "materials output elasticity, no selection correction"
			label var theta3_L_fjt_x "labor output elasticity, no selection correction"
			label var theta3_K_fjt_x "capital output elasticity, no selection correction"
*/
*** MTB: CHANGE IN ORIGINAL CODE: USE NOMINAL RATHER THAN DEFLATED MATERIAL EXPENDITURES (s110) WHEN CONSTRUCTING EXPENDITURE SHARE
			g expenditure_m_fjt_x = c_x*s110_nom //s110
			g alpha_M_mpf_x = (expenditure_m_fjt_x*xmfgshare)/sv
			g Markup_M_x    = theta_fjt_x/alpha_M_mpf_x
			g lmu_x = ln(Markup_M_x)
			g lmc_x = p_j - lmu_x
			label var lmu_x "log markup"
			label var lmc_x "log marginal cost"

	*one observation has a missing pnic4, even though the same product reports a pnic4 in a different year
*		replace pnic4 = 2411 if pcode12==100000000000 & pnic4==.

	*Identify Outliers
		tsset fup12 year
		sort  fup12 year
		g luv = log(sv/sq)
		g luv_x = luv
		egen itag = tag(fid year)
		foreach v of varlist luv_x lmu_x lmc_x { 
			foreach i of numlist 3 97 {
				bys pnic2: egen p`i'_`v' = pctile(`v'), p(`i')
				}
			}
		foreach v of varlist luv_x lmu_x lmc_x { 
			g cut3_`v'  = `v'>p3_`v' & `v'<p97_`v'
			drop p3_`v' p97_`v'
			}
		g mu_x = exp(lmu_x)
		
	gsort fid year - sv
	by fid year: g rank = _n
/*	
	*merge in IO Weights in order to create ERP
		sort pnic4 year
		merge pnic4 year using "${ioweight}/pnic4_IOweights_svc1"
			drop if _m==2
			drop _m
			local j = 3
			foreach i of numlist 10 100 {
				sort pnic`j' year
				merge pnic`j' year using "${ioweight}/pnic`j'_IOweights_svc1", update
					drop if _merge==2
					drop _m
					local j = `j' - 1
				}
	g p_erp = (p_tf - p_qt)/(1-IOweights1)
*/
	*drop variables that we don't need for regression results
		drop /**tau**/ m1 m2 m3 *k_s* *l_s* 
		forval i = 1/15 {
			cap drop beta`i'*
			}	
		drop msq msr msr4d /*IOweights**/ share_j mu
		drop exit c_x M_fjt_x L_fjt_x K_fjt_x m_fjt_x l_fjt_x k_fjt_x
		drop rho_x p_j2 r r_j p_j  
		drop Markup_M_x beta* min_x
		cap drop *_TL_dpunit 
		cap drop *_TL
		cap drop PRICE
		cap drop PRICE_mean
		drop gamma* TOTR TOTR4d up2 fup2 up4 fup4 up6 fup6 up8 fup8 up12 delta* single_next lxmfgshare
		drop /*s111*/ s121 s110 fa l k m /*s173 s115*/ pcode2 pcode4 pcode6 pcode8 pnic3
		drop sq UNIT2 /*unit*/ R_j p_ms
		drop cut*_luv*  
		drop Lx_fjt_x Kx_fjt_x Mx_fjt_x
		drop lk lm mk lmk omega_x converge_x p_jmean ms_mean msdif 
		drop xmfgshare /*m_matsonly*/ Wfjt expenditure_m_fjt_x alpha_M_mpf_x wfjt

		label var fid2yr "group(fid2 year)"
		lab var svshare "share of product sales in firm total sales"
		lab var lsv "log sales value"
		lab var luv "log unit value"
		lab var luv_x "log unit value (same as 'luv')"
		lab var mu_x "markup"
		lab var rank "product rank"
		*lab var p_erp "effective rate of protection"
		lab var cut3_lmu_x "Inverse indicator for markup outliers (3rd & 97th percentiles)"
		lab var cut3_lmc_x "Inverse indicator for cost outliers (3rd & 97th percentiles)"
		lab var nrobs1_st "obs from PF estimation"
*		lab var nrobs2_st "obs from PF estimation, no input correction"
*		lab var nrobs3_st "obs from PF estimation, no selection correction"
		lab var RTS_tl_x "returns to scale"
*		lab var RTS2_tl_x "returns to scale, no input correction"
*		lab var RTS3_tl_x "returns to scale, no selection correct"
	compress
saveold estimationfile,replace




